use combined_dataset_2024, clear
keep if banteammergers=="False"
keep if pubscore_normal~=.

drop teamleaderid teamname scorefirstsubmitteddate publicleaderboardsubmissionid privateleaderboardsubmissionid isbenchmark medal medalawarddate sourcekernelversionid scoredate enableteammodels 
drop lastsubmissiondate publicscoreleaderboarddisplay publicscorefullprecision privatescoreleaderboarddisplay privatescorefullprecision hostsegmenttitle hasleaderboard evaluationalgorithmismax maxteamsize banteammergers rewardtype  numprizes

/*
*score difference at the end of the contest by rank
bysort competitionid teamid : egen maxpriscoreteam = max(priscore_normal)
bysort competitionid teamid : keep if _n==1
drop if maxpriscoreteam ==.
drop if privateleaderboardrank ==.
sort competitionid maxpriscoreteam
gen aux = -maxpriscoreteam
bys competitionid (aux) : gen rank = _n
drop aux
bys competitionid  : gen k =1 if _n==1

bys competitionid (rank) : gen diff_1_2 = maxpriscoreteam[1] - maxpriscoreteam[2]
bys competitionid (rank) : gen diff_1_3 = maxpriscoreteam[1] - maxpriscoreteam[3]
bys competitionid (rank) : gen diff_1_4 = maxpriscoreteam[1] - maxpriscoreteam[4]
bys competitionid (rank) : gen diff_1_5 = maxpriscoreteam[1] - maxpriscoreteam[5]
bys competitionid (rank) : gen diff_1_6 = maxpriscoreteam[1] - maxpriscoreteam[6]
bys competitionid (rank) : gen diff_1_7 = maxpriscoreteam[1] - maxpriscoreteam[7]
bys competitionid (rank) : gen diff_1_8 = maxpriscoreteam[1] - maxpriscoreteam[8]
bys competitionid (rank) : gen diff_1_9 = maxpriscoreteam[1] - maxpriscoreteam[9]
bys competitionid (rank) : gen diff_1_10 = maxpriscoreteam[1] - maxpriscoreteam[10]
bys competitionid (rank) : gen diff_1_12 = maxpriscoreteam[1] - maxpriscoreteam[12]
bys competitionid (rank) : gen diff_1_13 = maxpriscoreteam[1] - maxpriscoreteam[13]
bys competitionid (rank) : gen diff_1_14 = maxpriscoreteam[1] - maxpriscoreteam[14]
bys competitionid (rank) : gen diff_1_15 = maxpriscoreteam[1] - maxpriscoreteam[15]
bys competitionid (rank) : gen diff_1_30 = maxpriscoreteam[1] - maxpriscoreteam[30]
bys competitionid (rank) : gen diff_1_38 = maxpriscoreteam[1] - maxpriscoreteam[38]
bys competitionid (rank) : gen diff_1_40 = maxpriscoreteam[1] - maxpriscoreteam[40]
*/
/*
keep if priscore_normal ~= .
bys competitionid : keep if _n==1
keep rewardtype rewardquantity numprizes totalteams totalcompetitors totalsubmissions 
estpost sum  rewardquantity  totalteams totalcompetitors totalsubmissions 
esttab, cell("mean sd min max  ") tex


*/

*submission number
bys teamid (id): gen subnum_jt = _n
gen last_jt = 0
bys teamid (id): replace last_jt = 1 if _n == _N
bys teamid (id): gen daysbetween_jt = submissiondate-submissiondate[_n-1] if _n>1
bys competitionid submitteduserid (id): gen subnum_ijt = _n
bys competitionid (id): gen subnum_var_comp = _n/1000

*contest progress
gen subtime_aux = submissiondate if isafterdeadline=="False"
bys competitionid : egen startdate = min(subtime_aux)
bys competitionid : egen enddate = max(subtime_aux)
gen progress = (submissiondate - startdate)/(enddate-startdate)
replace progress = 1 if isafterdeadline=="True"
gen dayselapsed = (submissiondate - startdate) + 1
gen dayselapsed_subs = dayselapsed*maxdailysubmissions
gen merger_eleg_thresh=subnum_ijt/dayselapsed_subs
drop startdate  subtime_aux

*number of active teams
bys teamid (id): gen aux = 1 if _n==1
replace aux = 0 if aux==.
gen teams_var_comp = 1
bys competitionid (id): replace teams_var_comp = teams_var_comp[_n-1] + aux if _n>1
replace teams_var_comp = teams_var_comp/1000

*FE
egen compday = group(competitionid submissiondate)
gen submissionweek_aux = week(submissiondate)
gen submissionyear_aux = year(submissiondate)
egen submissionweek = group(submissionweek_aux submissionyear_aux)
drop submissionweek_aux submissionyear_aux
egen compweek = group(competitionid submissionweek)
egen compuser = group(competitionid submitteduserid)

*max scores up to time t
gen max_pubscore_j = -5
gen max_pubscore_jt = -5
bys competitionid (id): replace max_pubscore_j = max(max_pubscore_j[_n-1], pubscore_normal[_n-1]) if _n>1
bys competitionid teamid (id): replace max_pubscore_jt = max(max_pubscore_jt[_n-1], pubscore_normal[_n-1]) if _n>1
*gen diff_max_pubscore = max_pubscore_j-max_pubscore_jt
gen diff_max_pubscore = 5
bys competitionid teamid (id): replace diff_max_pubscore = max_pubscore_j-max_pubscore_jt if _n>1


*did the new member make the submission?
gen oldmembersubmission = 0
replace oldmembersubmission = 1 if (userid1 == submitteduserid) & userid1~=. & submitteduserid~=.

/*
*inc75
bys competitionid teamid: egen teammaxpubscore = max(pubscore_normal)
gen pubscore_p75 = .
gen pubscore_p90 = .
levelsof competitionid, local(levels) 
 foreach l of local levels {
 quietly: sum pubscore_normal if competitionid==`l', d
 replace pubscore_p75 = `=r(p75)' if competitionid==`l'
 replace pubscore_p90 = `=r(p90)' if competitionid==`l'
 }
gen inc90=teammaxpubscore> pubscore_p90
gen inc75=teammaxpubscore> pubscore_p75

*reghdfe pubscore_normal teamsize_interim diff_max_pubscore subnum_jt subnum_ijt subnum_jt2 subnum_ijt2  if teamsize<3 & inc75==1, absorb(compdate teamid) vce(cluster teamid)
*reghdfe pubscore_normal teamsize_interim diff_max_pubscore subnum_jt subnum_ijt subnum_jt2 subnum_ijt2  if teamsize<3 & privateleaderboardrank<=50, absorb(compdate teamid) vce(cluster teamid)

*/

*state variable controls
unab vars : subnum_var_comp subnum_jt subnum_ijt progress diff_max_pubscore
di "`vars'"
scalar asdf = 0
foreach k of varlist `vars' {
scalar asdf = asdf + 1
gen polx_`=asdf' = `k'
}

foreach k of numlist 1(1)`=asdf' {
foreach j of numlist 1(1)`=asdf' {
if `k'<=`j'{
gen polx_`k'_`j' = polx_`k'*polx_`j'
}
}
}

unab llpol : polx_*
foreach var in `llpol' {
gen x2_`var' = `var'
}

foreach h of numlist 1(1)`=asdf' {
foreach k of numlist 1(1)`=asdf' {
foreach j of numlist 1(1)`=asdf' {

gen polx_`k'_`j'_`h' = polx_`k'*polx_`j'*polx_`h'

}
}
}

_rmcoll polx*   , forcedrop
foreach var in  `r(varlist)' {
gen x3_`var' = `var'
}
drop polx*
